Introduction

This is a notebook documenting our analysis of two single-cell RNA-sequencing (scRNA-seq) samples for a novel in vitro human Extra-Embryoid (hEE) model. This work was originally published in Pedroza, Gassaloglu et al. 2022. The data used in this analysis is available at GEO accession GSE208195.

Required libraries.

library(Seurat, verbose = FALSE, warn.conflicts = FALSE) # Basic single-cell analysis
library(ggplot2) # General plot functions
library(gridExtra) # Arrange plots
library(dplyr) # Pipe operator
library(matrixStats) # Variance explained by PCs
library(clustree) # Evaluate clustering robustness
library(EnhancedVolcano) # Volcano plots
library(ggbeeswarm) # Randomize dots in dotplots
library(slingshot) # Trajectory inference
library(scater) # Plotting gene expression over pseudotime
library(SingleCellExperiment) # SingleCellExperiment object class
library(gridExtra) # Arranging plots

Quality control and initial analysis

Loading raw data and creating Seurat Objects.

# Set data directory for convenience
path <- "L:/Smith_Lab/Collaborations/Sozen_lab/"

# Read in 10X output
d4 <- Read10X(data.dir = paste0(path,"Data_Submission/D4/filtered_feature_bc_matrix"))
d6 <- Read10X(data.dir = paste0(path,"Data_Submission/D6/filtered_feature_bc_matrix"))

# Create Seurat objects, append metadata and barcodes with timepoint information
d4 <- CreateSeuratObject(counts = d4, project = "D4", min.cells = 5, min.features = 50)
d4$timepoint <- "D4"
d4 <- RenameCells(d4, add.cell.id = "D4")
d6 <- CreateSeuratObject(counts = d6, project = "D6", min.cells = 5, min.features = 50)
d6$timepoint <- "D6"
d6 <- RenameCells(d6, add.cell.id = "D6")

print(paste0("Cells in D4: ", ncol(d4)))
## [1] "Cells in D4: 10943"
print(paste0("Cells in D6: ", ncol(d6)))
## [1] "Cells in D6: 11483"

First round of quality control. Samples were merged for simplicity in this notebook, as quality metrics were consistently distributed across the data. Thresholds were determined empirically based on these distributions and metadata statistics.

merged <- merge(d4, d6) # Merge data

rm(d4, d6) # memory allocation
gc(verbose=FALSE)
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7585744  405.2   12796312  683.4   8760633  467.9
## Vcells 268311353 2047.1  733395842 5595.4 654655289 4994.7
merged[["percent.mt"]] <- PercentageFeatureSet(merged, pattern = "^MT-") # Annotate mitochondrial content

table(merged$orig.ident) # Number of cells in each condition
## 
##    D4    D6 
## 10943 11483
summary(merged$nCount_RNA) # Transcript number
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     499    9317   12556   13467   16676   96844
summary(merged$nFeature_RNA) # Number of unique genes expressed
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      88    3222    3870    3796    4580    8803
summary(merged$percent.mt) # Percent of transcripts of mitochondrial origin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.786   3.044   5.186   4.619  88.132
sd(merged$nCount_RNA) 
## [1] 7602.871
sd(merged$nFeature_RNA)
## [1] 1362.026
sd(merged$percent.mt) 
## [1] 9.229145
p1 <- ggplot(merged@meta.data, mapping = aes(x = nFeature_RNA)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 2000), color = 'red') + NoLegend()
p2 <- ggplot(merged@meta.data, mapping = aes(x = percent.mt)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 10, color = 'red')) + NoLegend()
p3 <- ggplot(merged@meta.data, mapping = aes(x = nCount_RNA)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 31000, color = 'red')) + NoLegend()
print(p1 + p2 + p3)

After applying QC thresholds, distribution of quality metrics is roughly normal. Another round of cluster based QC is applied downstream.

# Subset on QC thresholds
merged.f <- subset(merged, subset = percent.mt < 10 & nFeature_RNA > 2000 & nCount_RNA < 27500)

rm(merged) # memory allocation
gc(verbose=FALSE)
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7701672  411.4   12796312  683.4  12796312  683.4
## Vcells 241539554 1842.9  733395842 5595.4 689465310 5260.3
p1 <- ggplot(merged.f@meta.data, mapping = aes(x = nFeature_RNA)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 2000), color = 'red') + NoLegend()
p2 <- ggplot(merged.f@meta.data, mapping = aes(x = percent.mt)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 10, color = 'red')) + NoLegend()
p3 <- ggplot(merged.f@meta.data, mapping = aes(x = nCount_RNA)) + geom_histogram(bins = 120) + geom_vline(aes(xintercept = 27500, color = 'red')) + NoLegend()
print(p1 + p2 + p3) 

Initial analysis revealed two major compartments shared between D4 and D6 but separated by PC1. Interestingly, there was also substantial separation between D4 and D6 samples along PC2 and in UMAP. Of the top 50 loadings for PC2, 10% were mitochondrial genes, suggesting a potential batch effect.

# Standard preprocessing
merged.f <- merged.f %>% 
  NormalizeData(verbose = FALSE) %>% 
  FindVariableFeatures(verbose = FALSE) %>% 
  ScaleData(features = VariableFeatures(merged.f), verbose = FALSE) %>%
  RunPCA(verbose = FALSE) %>%
  FindNeighbors(dims = 1:20, verbose = FALSE) %>%
  RunUMAP(dims = 1:20, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
p1 <- DimPlot(merged.f, reduction = "pca", group.by = "timepoint", shuffle = TRUE) # Plot PCA biplot for PC1/PC2
p2 <- DimPlot(merged.f, reduction = "umap", group.by = "timepoint", shuffle = TRUE) # Plot PCA biplot for PC1/PC2

p3 <- ElbowPlot(merged.f) # Scree plot for PCA

# Calculate % of total variance explained by each PC (50 total)
percent_var = ((merged.f@reductions$pca@stdev)^2 / sum(rowVars(merged.f@assays$RNA@scale.data)))*100
print(percent_var[1:2]) # Variance explained by PC1 and PC2
## [1] 1.155934 0.159855
print(p1 + p2 + p3)

print(DimHeatmap(merged.f, dims = 1:2, nfeatures = 50, balanced = TRUE, reduction = "pca")) # Heatmap for top 50 loadings of PC1 and PC2

## NULL

After further analysis, we decided to perform data integration on the D4 and D6 samples using Reciprocal PCA (RPCA). RPCA is described in Andreatta and Carmona, 2021. It is recommended for use in integration where not all cell types are present in two or more samples generated with a common technology (10X). Given the possibility of temporal differences such as progenitor pool depletion or cell type differentiation in the 48hr interval, we opted for this technique instead of Canonical Correlation Analysis (CCA), for example. We also attempted to regress out variance derived from mitochondrial genes and cell cycle differences (not all cell cycle variance), retaining substantial variation in one major compartment due to phase. Initial gene expression projection onto the UMAP revealed that the major compartments may represent distinct embryonic (SOX2/HESX1+) and extra-embryonic (GATA3/6+) populations, each with some internal heterogeneity that may have been obscured by the great differences between them.

merge.list <- SplitObject(merged.f, split.by = "orig.ident") # Split merged object for integration

rm(merged.f) # memory allocation
gc(verbose=FALSE)
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    7817081   417.5   12796312   683.4   12796312   683.4
## Vcells 1398321531 10668.4 2007829575 15318.6 1661770211 12678.4
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- NormalizeData(x, verbose = FALSE)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
})

features <- SelectIntegrationFeatures(object.list = merge.list)

merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})

merge.anchors <- FindIntegrationAnchors(object.list = merge.list, anchor.features = features, reduction = "rpca", verbose = FALSE) # Identify similar cells between D4 and D6 using reciprocal PCA
merge.integrated <- IntegrateData(anchorset = merge.anchors, verbose = FALSE) # Integrate on anchors

rm(merge.list, merge.anchors)
gc(verbose=FALSE)
##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   7841117  418.8   12796312   683.4   12796312   683.4
## Vcells 964285631 7357.0 2409475490 18382.9 2263191686 17266.8
DefaultAssay(merge.integrated) <- "integrated"

# Cell (CC) cycle scoring based on known genes
s.genes <- cc.genes$s.genes 
g2m.genes <- cc.genes$g2m.genes
merge.integrated <- CellCycleScoring(merge.integrated,  s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
## Warning: The following features are not present in the object: TYMS, FEN1,
## MCM2, RRM1, GINS2, MCM6, MLF1IP, RFC2, RPA2, RAD51AP1, GMNN, WDR76, SLBP, CCNE2,
## UBR7, POLD3, MSH2, RAD51, CDC45, EXO1, TIPIN, DSCC1, BLM, CASP8AP2, USP1, POLA1,
## CHAF1B, BRIP1, E2F8, not searching for symbol synonyms
## Warning: The following features are not present in the object: TMPO, FAM64A,
## HN1, CDC25C, RANGAP1, LBR, CTCF, GAS2L3, CBX5, not searching for symbol synonyms
merge.integrated$CC.Difference <- merge.integrated$S.Score - merge.integrated$G2M.Score # Calculate CC difference

merge.integrated <- ScaleData(merge.integrated, verbose = FALSE, vars.to.regress = c("CC.Difference", "percent.mt")) 
merge.integrated <- RunPCA(merge.integrated, npcs = 30, verbose = FALSE) 
merge.integrated <- RunUMAP(merge.integrated, dims = 1:30, verbose = FALSE)
merge.integrated <- FindNeighbors(merge.integrated, dims = 1:30, verbose = FALSE)
merge.integrated <- FindClusters(merge.integrated, resolution = 0.1, verbose = FALSE) # Low resolution to identify major compartments


p1 <- DimPlot(merge.integrated, reduction = "umap", group.by = "orig.ident", shuffle = TRUE) + ggtitle("Timepoint")
p2 <- DimPlot(merge.integrated, reduction = "umap", group.by = "integrated_snn_res.0.1", label = TRUE, repel = TRUE, shuffle = "TRUE")
p3 <- DimPlot(merge.integrated, reduction = "umap", group.by = "Phase", label = TRUE, repel = TRUE, shuffle = "TRUE")
print(p1 + p2 + p3)

print(FeaturePlot(merge.integrated, features = c("GATA3", "GATA6", "SOX2", "HESX1"), order = TRUE, ncol = 2))

To further classify these major compartments, we attempted label transfer using an in vivo human embryo scRNA-seq reference from Xiang et al. 2019. Consistent with our preliminary observations, the major compartments were classified as “Epiblast” and “Hypoblast” using this approach. We harnessed these classifications to subset our data for sub-clustering.

xiang <- readRDS(paste0(path, "Xiang/xiang_seurat.rds"))

x_anchors <- FindTransferAnchors(reference = xiang, query = merge.integrated, dims = 1:30, reference.reduction = "pca", k.filter = NA, verbose = FALSE) # Too few cells to filter anchors
predictions_xiang <- TransferData(anchorset = x_anchors, refdata = xiang$Cell_type, dims = 1:30, verbose = FALSE)

merge.integrated <- AddMetaData(merge.integrated, metadata = predictions_xiang)
merge.integrated$predicted.id <- paste0("Xiang_", merge.integrated$predicted.id)

rm(predictions_xiang, x_anchors, xiang)
gc(verbose=FALSE)
##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   7903045  422.1   12796312   683.4   12796312   683.4
## Vcells 313341259 2390.7 1927580392 14706.3 2263191686 17266.8
p1 <- DimPlot(merge.integrated, group.by = "predicted.id", cols = c("#20B169", "#E28E44")) + ggtitle("Label Transfer")
print(p1)

Subclustering

Having identified the two major compartments, we set out to describe the heterogeneity within them through sub-clustering. First, the compartment identified as Epiblast was isolated, integrated with RPCA, and clustered. We observed fairly robust clustering at the 0.35-0.45 resolution range, and opted for the lower end (0.35) for initial exploration. The finer resolution also allowed us to identify artifact communities with consistent clustering and poor quality metrics, which were filtered out in sequential steps described below.

# Subset on label transfer identity
Idents(merge.integrated) <- merge.integrated$predicted.id
epi <- subset(merge.integrated, idents = "Xiang_EPI")

# Split subset by time point
DefaultAssay(epi) <- "RNA"
epi <- SplitObject(epi, "orig.ident")

# Preprocessing
epi <- lapply(X = epi, FUN = function(x) {
    x <- NormalizeData(x, verbose = FALSE)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
})

# Integration
epi_features <- SelectIntegrationFeatures(object.list = epi, verbose = FALSE)

epi <- lapply(X = epi, FUN = function(x) {
    x <- ScaleData(x, features = epi_features, verbose = FALSE)
    x <- RunPCA(x, features = epi_features, verbose = FALSE)
})

epi_anchors <- FindIntegrationAnchors(object.list = epi, anchor.features = epi_features, reduction = "rpca", verbose = FALSE)
epi <- IntegrateData(anchorset = epi_anchors, verbose = FALSE)

rm(epi_anchors, epi_features)
gc(verbose = FALSE)
##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   7893002  421.6   12796312   683.4   12796312   683.4
## Vcells 453886878 3462.9 1542064314 11765.1 2263191686 17266.8
# Preprocessing
DefaultAssay(epi) <- "integrated"
epi <- ScaleData(epi, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
epi <- RunPCA(epi, verbose = FALSE)
epi <- RunUMAP(epi, dims = 1:30, verbose = FALSE)

# Subclustering
epi <- FindNeighbors(epi, dims = 1:30, verbose = FALSE)
epi <- FindClusters(epi, resolution = seq(0.2, 1, 0.05), verbose = FALSE) # Identify constant clusters

print(clustree(epi)) # Clustering tree

# Analysis
epi$sub_clusters <- paste0("Epi_", epi$integrated_snn_res.0.35)
p1 <- DimPlot(epi, shuffle = TRUE, group.by = "sub_clusters", label = TRUE)
p2 <- DimPlot(epi, split.by = "orig.ident", group.by = "sub_clusters")
print(p1 + p2)

print(VlnPlot(epi, group.by = "sub_clusters", features = c("nFeature_RNA", "nCount_RNA", "percent.mt")))

In a second pass through the Epiblast compartment, we observed a large community that was both removed from the main structure, positive for Hypoblast marker genes (GATA3, GATA6), and displayed close to twice as high average transcript count and unique feature expression compared to other cells in this subset. We decided to aggregate this population into one community using the clustree to identify a suitable resolution, ultimately opting for 0.2 and filtering out this subcluster as potential doublets.

# Filter
epi <- subset(epi, sub_clusters == "Epi_8", invert = TRUE)

# Reprocess
DefaultAssay(epi) <- "integrated"
epi <- ScaleData(epi, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
epi <- RunPCA(epi, npcs = 30, verbose = FALSE)
epi <- RunUMAP(epi, reduction = "pca", dims = 1:30, verbose = FALSE)

# Sub-cluster analysis
epi <- FindNeighbors(epi, reduction = "pca", dims = 1:30, verbose = FALSE)
epi <- FindClusters(epi, resolution = seq(0.2,1, 0.05), verbose = FALSE)

print(clustree(epi))

epi$sub_clusters <- paste0("Epi_", epi$integrated_snn_res.0.2)
p1 <- DimPlot(epi, shuffle = TRUE, group.by = "sub_clusters", label = TRUE)
p2 <- DimPlot(epi, split.by = "orig.ident", group.by = "sub_clusters")
p3 <- FeaturePlot(epi, features = "nFeature_RNA")

print(p1 + p2 + p3)

print(VlnPlot(epi, group.by = "sub_clusters", features = c("nFeature_RNA", "nCount_RNA", "percent.mt")))

print(FeaturePlot(epi, features = c("GATA3", "GATA6"), order = TRUE))

At this stage, we observed that many of the larger communities were split from the main structure in this subset, suggesting overclustering. We again selected a resolution of 0.35 as a starting point and identified marker genes for each cluster. We noticed that, indeed, sub-clusters 0, 1 and 2 had very few specific markers genes, indicating redundancy in the clustering, even at relatively low resolution. These clusters were consolidated into the reported “PI-Epi” cluster.

# Filter
epi <- subset(epi, sub_clusters == "Epi_2", invert = TRUE)

# Reprocess
DefaultAssay(epi) <- "integrated"
epi <- ScaleData(epi, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
epi <- RunPCA(epi, npcs = 30, verbose = FALSE)
epi <- RunUMAP(epi, reduction = "pca", dims = 1:30, verbose = FALSE)

# Sub-cluster analysis
epi <- FindNeighbors(epi, reduction = "pca", dims = 1:30, verbose = FALSE)
epi <- FindClusters(epi, resolution = seq(0.2,1, 0.05), verbose = FALSE)

print(clustree(epi))

epi$sub_clusters <- paste0("Epi_", epi$integrated_snn_res.0.35)
p1 <- DimPlot(epi, shuffle = TRUE, group.by = "sub_clusters", label = TRUE) + ggtitle("Epiblast subclusters")
p2 <- DimPlot(epi, split.by = "orig.ident", group.by = "sub_clusters")
print(p1 + p2)

print(VlnPlot(epi, group.by = "sub_clusters", features = c("nFeature_RNA", "nCount_RNA")))

# Marker identification
Idents(epi) <- epi$sub_clusters
DefaultAssay(epi) <- "RNA"
epi <- ScaleData(epi, verbose = FALSE)
epi_markers <- FindAllMarkers(epi, only.pos = TRUE, verbose = FALSE)
epi_markers$pct.diff <- epi_markers$pct.1 - epi_markers$pct.2
epi_markers <- epi_markers %>%
  filter(p_val_adj < 1e-10 & abs(pct.diff) > 0.2)

top30_markers_epi <- epi_markers %>% 
  group_by(cluster) %>%
    slice_max(n = 30, order_by = avg_log2FC)

print(DoHeatmap(epi, features = top30_markers_epi$gene) + NoLegend())

Next, we set out to characterize the Hypoblast compartment. Given the large amount of variance explained by the cell cycle in this population, we expected a lower degree of heterogeneity. Nevertheless, we proceeded with a similar strategy as previously described. Following from cluster tree analysis, we noticed a consistent small cluster with abnormal quality metrics and removed from the main structure. We opted to remove this cluster as potential doublets. Interestingly, even major clusters seemed to be primarily separated by cell cycle phase, suggesting a persistent population of slow-cycling G1 cells, and another of G2/S phase cells.

# Subset
Idents(merge.integrated) <- merge.integrated$predicted.id
hypo <- subset(merge.integrated, idents = "Xiang_Hypoblast")

# Split object by time point
DefaultAssay(hypo) <- "RNA"
hypo <- SplitObject(hypo, "orig.ident")

# Preprocessing
hypo <- lapply(X = hypo, FUN = function(x) {
    x <- NormalizeData(x, verbose = FALSE)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
})

# Integration
hypo_features <- SelectIntegrationFeatures(object.list = hypo)

hypo <- lapply(X = hypo, FUN = function(x) {
    x <- ScaleData(x, features = hypo_features, verbose = FALSE)
    x <- RunPCA(x, features = hypo_features, verbose = FALSE)
})

hypo_anchors <- FindIntegrationAnchors(object.list = hypo, anchor.features = hypo_features, reduction = "rpca")
## Scaling features for provided objects
## Computing within dataset neighborhoods
## Finding all pairwise anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 3341 anchors
hypo <- IntegrateData(anchorset = hypo_anchors)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
rm(hypo_anchors)
gc(verbose=FALSE)
##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   8120880  433.8   12796312   683.4   12796312   683.4
## Vcells 829347147 6327.5 1850557176 14118.7 2263191686 17266.8
# Pre-processing
DefaultAssay(hypo) <- "integrated"
hypo <- ScaleData(hypo, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
hypo <- RunPCA(hypo, npcs = 30, verbose = FALSE)
hypo <- RunUMAP(hypo, reduction = "pca", dims = 1:30, verbose = FALSE)

# Sub-luster analysis
hypo <- FindNeighbors(hypo, reduction = "pca", dims = 1:30, verbose = FALSE)
hypo <- FindClusters(hypo, resolution = seq(0.1, 0.8,0.05), verbose = FALSE)

clustree(hypo)

hypo$sub_clusters <- paste0("Hypo_", hypo$integrated_snn_res.0.35)
p1 <- DimPlot(hypo, shuffle = TRUE, group.by = "sub_clusters")
p2 <- DimPlot(hypo, split.by = "orig.ident", group.by = "sub_clusters")
p3 <- DimPlot(hypo, group.by = "Phase", shuffle = TRUE)
p4 <- FeaturePlot(hypo, features = c("nFeature_RNA", "nCount_RNA"))
print(p1 + p2 + p3)

print(p4)

print(VlnPlot(hypo, features = c("nFeature_RNA", "nCount_RNA"), group.by = "sub_clusters"))

After removing the artifact, we moved to characterize the remaining populations. We opted for slight overclustering of the clear two populations divided by phase, choosing a resolution parameter of 0.35 for a total of 4 communities. Finally, we performed differential gene expression to resolve these clusters’ identities. Importantly, we observed a surprising subset of inhibitor genes ( CER1, LEFTY1, LEFTY2, SHISA2, NODAL2) was highly expressed the smallest sub-cluster. We determined that this pattern of expression was consistent with the Anterior Visceral Endoderm (AVE), and termed this population “AVE-like”.

# Filter
hypo <- subset(hypo, sub_clusters == "Hypo_3", invert = TRUE)

# Reprocess
DefaultAssay(hypo) <- "integrated"
hypo <- ScaleData(hypo, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
hypo <- RunPCA(hypo, npcs = 30, verbose = FALSE)
hypo <- RunUMAP(hypo, reduction = "pca", dims = 1:30, verbose = FALSE)

# Sub-cluster analysis
hypo <- FindNeighbors(hypo, reduction = "pca", dims = 1:30, verbose = FALSE)
hypo <- FindClusters(hypo, resolution = seq(0.1, 0.6, 0.05), verbose = FALSE)

print(clustree(hypo))

# Plots
hypo$sub_clusters <- paste0("Hypo_", hypo$integrated_snn_res.0.35)
p1 <- DimPlot(hypo, shuffle = TRUE, group.by = "sub_clusters", label = TRUE) + ggtitle("Hypoblast subclusters")
p2 <- DimPlot(hypo, split.by = "orig.ident", group.by = "sub_clusters")
p3 <- DimPlot(hypo, group.by = "Phase", shuffle = TRUE)
p4 <- FeaturePlot(hypo, features = c("nFeature_RNA", "nCount_RNA"))
print(p1 + p2 + p3)

print(p4)

print(VlnPlot(hypo, group.by = "sub_clusters", features = c("nFeature_RNA", "nCount_RNA")))

# Marker identification
DefaultAssay(hypo) <- "RNA"
Idents(hypo) <- hypo$sub_clusters
hypo <- ScaleData(hypo, verbose = FALSE)

hypo_markers <- FindAllMarkers(hypo, only.pos = TRUE, verbose = FALSE)
hypo_markers$pct.diff <- hypo_markers$pct.1 - hypo_markers$pct.2
hypo_markers <- hypo_markers %>%
  filter(p_val_adj < 1e-10 & abs(pct.diff) > 0.2)


top30_markers_hypo <- hypo_markers %>% 
  group_by(cluster) %>%
    slice_max(n = 30, order_by = avg_log2FC)

print(DoHeatmap(hypo, features = top30_markers_hypo$gene) + NoLegend())

Having identified both intriguing and redundant sub-clusters, we next removed the cells filtered in the individual compartment analyses and consolidated our new annotation into the reported cell types. Note that the UMAPs in this notebook may look different from the published report. Differential gene expression results highlight the similarity between sub-clusters within either compartment, while some characteristic genes still distinguish them from the larger total.

# Subset after additional QC
hEE <- subset(merge.integrated, cells = c(colnames(epi), colnames(hypo)))
DefaultAssay(hEE) <- "RNA"

anno_object <- merge(epi, hypo)

# Integration
hEE <- SplitObject(hEE, split.by = "orig.ident")

hEE <- lapply(X = hEE, FUN = function(x) {
    x <- NormalizeData(x, verbose = FALSE)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
})

hEE_features <- SelectIntegrationFeatures(object.list = hEE)

hEE <- lapply(X = hEE, FUN = function(x) {
    x <- ScaleData(x, features = hEE_features, verbose = FALSE)
    x <- RunPCA(x, features = hEE_features, verbose = FALSE)
})

hEE_anchors <- FindIntegrationAnchors(object.list = hEE, anchor.features = hEE_features, reduction = "rpca", verbose = FALSE)
hEE <- IntegrateData(anchorset = hEE_anchors, verbose = FALSE)

rm(hEE_anchors, hEE_features)
gc(verbose=FALSE)
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    8127161   434.1   12796312   683.4   12796312   683.4
## Vcells 1522426716 11615.2 3198053999 24399.3 3096016579 23620.8
# Preprocessing
DefaultAssay(hEE) <- "integrated"
hEE <- ScaleData(hEE, vars.to.regress = c("percent.mt", "CC.Difference"), verbose = FALSE)
hEE <- RunPCA(hEE, npcs = 30, verbose = FALSE)
hEE <- RunUMAP(hEE, reduction = "pca", dims = 1:30, verbose = FALSE)

# Transfer annotation
hEE$sub_clusters <- anno_object$sub_clusters
md <- hEE@meta.data

md <- md %>%
  mutate(Cell_type = case_when(sub_clusters == "Epi_0" ~ "PI-Epi",
                           sub_clusters == "Epi_1" ~ "PI-Epi",
                           sub_clusters == "Epi_2" ~ "PI-Epi",
                           sub_clusters == "Epi_3" ~ "PI-Epi.L",
                           sub_clusters == "Epi_4" ~ "AME",
                           sub_clusters == "Epi_5" ~ "PS-like",
                           sub_clusters == "Epi_6" ~ "Meso-like",
                           sub_clusters == "Hypo_0" ~ "G2M/S Hypo",
                           sub_clusters == "Hypo_1" ~ "G1 Hypo",
                           sub_clusters == "Hypo_2" ~ "G1 Hypo",
                           sub_clusters == "Hypo_3" ~ "AVE-like"))

hEE@meta.data <- md

rm(hypo, epi, merge.integrated, anno_object)
gc(verbose=FALSE)
##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   8126572  434.1   12796312   683.4   12796312   683.4
## Vcells 641450297 4893.9 2558443200 19519.4 3096016579 23620.8
cluster_colors <- c("#85C1E9", "#B03A2E", "#F1948A", "#AF7AC5", "#F5B041", "#808B96", "#2471A3", "#229954")
cluster_order <- c("PI-Epi", "AME", "PI-Epi.L", "PS-like", "Meso-like", "G1 Hypo", "G2M/S Hypo", "AVE-like")
anno <- data.frame(colors = cluster_colors, order = cluster_order)

print(DimPlot(hEE) + scale_color_manual(values = anno$colors, breaks = anno$order)) # Final UMAP

# Marker identification
Idents(hEE) <- hEE$Cell_type
DefaultAssay(hEE) <- "RNA"
hEE <- NormalizeData(hEE, verbose = FALSE)
hEE <- ScaleData(hEE, verbose = FALSE)

hEE_markers <- FindAllMarkers(hEE, only.pos = TRUE, verbose = FALSE)
hEE_markers$pct.diff <- hEE_markers$pct.1 - hEE_markers$pct.2
hEE_markers <- hEE_markers %>%
  filter(p_val_adj < 1e-10 & abs(pct.diff) > 0.2)

top30_hEE_markers <- hEE_markers %>% 
  group_by(cluster) %>%
    slice_max(n = 30, order_by = avg_log2FC)

print(DoHeatmap(hEE, features = top30_hEE_markers$gene) + NoLegend())

Downstream analyses

Following the consolidation of stable cell types, we next directed our attention to two focused issues: characterization of the AME sub-cluster in comparison to the main Epiblast population, and disambiguation of the Hypoblast cluster as to its extra-embryonic character, considering the similarity in expression between the extra-embryonic Hypoblast and Definitive Endoderm, an embryonic lineage. For the former, differential gene expression revealed upregulation of amnion markers such as BMP4 and ISL1 in this population, consistent with our classification.

ave_hypo <- subset(hEE, idents = c("AME", "PI-Epi"))
DefaultAssay(ave_hypo) <- "RNA"
ave_markers <- FindMarkers(ave_hypo, ident.1 = "AME")

set <- c("TFAP2A", "PCAT14", "FAM20A", "ID1", "ID4", "MSX2", "NKX1-2", "BMP4", "ISL1", "TPM1", "SEPHS1", "VIM", "LINC00458", "CD24", "AS1", "FGFBP3", "AK4", "FOXD3") # Genes to highlight due to label proximity

p1 <- EnhancedVolcano(ave_markers,
    lab = rownames(ave_markers),
    x = 'avg_log2FC',
    y = 'p_val_adj', title = "", subtitle = "AME vs. PI-Epi", FCcutoff = 0.4, selectLab = set)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-
## zero p-value...
print(p1)

As for the Hypoblast, we selected a set of Definitive Endoderm (DE) and Primitive Endoderm (PE) markers and calculated a module score: a measure of average expression for a group of genes relative to a random genes with similar levels of expression across the entire sample as background. A score of 0 means the selected genes have about average expression, while positive or negative scores denote upregulation and downregulation respectively. By subtracting one score from the other, we obtained a relative module score of PE vs. DE identity, which unambiguously favored PE in the Hypoblast compartment sub-clusters.

# Calculate module score for Definitive Endoderm markers
hEE <- AddModuleScore(hEE, features = list(c("CD48", "PTN", "KIT", "EGF")), name = "DE_markers")
summary(hEE$DE_markers1)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.125249 -0.076777 -0.057994  0.003517  0.075428  0.720453
FeaturePlot(hEE, features = "DE_markers1", order = TRUE)

# Calculate module score for Primitive Endoderm markers
hEE <- AddModuleScore(hEE, features = list(c("APOC1", "PODXL", "VCAN", "APOE")), name = "PE_markers")
summary(hEE$PE_markers1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.0970 -0.2743  0.1310  0.3812  1.0715  2.5584
FeaturePlot(hEE, features = "PE_markers1", order = TRUE)

# Calculate difference for relative score
hEE$Endo_score <- hEE$PE_markers1 - hEE$DE_markers1
summary(hEE$Endo_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.1867 -0.2848  0.1386  0.3776  1.0791  2.4352
FeaturePlot(hEE, features = "Endo_score", order = TRUE)

# Binarize score
hEE$Endo_character <- ifelse(hEE$Endo_score > 0, "PE", "DE") # If score is positive, PE is greater than DE, and vice-versa

print(table(hEE$Endo_character, hEE$Cell_type))
##     
##       AME AVE-like G1 Hypo G2M/S Hypo Meso-like PI-Epi PI-Epi.L PS-like
##   DE  348       12      25         45        83   6357      903     292
##   PE  178      411    4135       4024        45    936      191      43
# Proportion of cells classified as either PE or DE in the Hypoblast
df <- hEE@meta.data[hEE$Cell_type == "G1 Hypo" | hEE$Cell_type == "G2M/S Hypo" | hEE$Cell_type == "AVE-like", ] # Subset metadata
t <- as.data.frame.matrix(t(prop.table(table(df$Endo_character, df$Cell_type), margin = 2))) # proportion table of DE vs. PE in the Hypoblast sub-clusters
print(t)
##                     DE        PE
## AVE-like   0.028368794 0.9716312
## G1 Hypo    0.006009615 0.9939904
## G2M/S Hypo 0.011059228 0.9889408
print(ggplot(df) + geom_bar(aes(x = Endo_character, y = ..prop.., group = 1, fill = Endo_character), stat = "count") + scale_y_continuous(labels = scales::percent_format()) + facet_wrap( ~ Cell_type) + theme_minimal() + ylab("Proportion") + xlab("Relative Marker Expression"))

hEE$Cell_type <- factor(hEE$Cell_type, levels = anno$order)

# Boxplots of scores
p1 <- ggplot(hEE@meta.data, aes(x = Cell_type, y = DE_markers1, fill = Cell_type)) + 
  geom_boxplot(outlier.shape = NA) + scale_fill_manual(values=as.character(anno$colors), breaks = anno$order) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + ylab("DE Markers") + xlab("Cell type") + geom_jitter(shape=".", position=position_jitter(0.2)) + ylim(-1.3, 2.7) + NoLegend()

p2 <- ggplot(hEE@meta.data, aes(x = Cell_type, y = PE_markers1, fill = Cell_type)) + 
  geom_boxplot(outlier.shape = NA) + scale_fill_manual(values=as.character(anno$colors), breaks = anno$order) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + ylab("PE Markers") + xlab("Cell type") + geom_jitter(shape=".", position=position_jitter(0.2)) + ylim(-1.3, 2.7) + NoLegend()

p3 <- ggplot(hEE@meta.data, aes(x = Cell_type, y = Endo_score, fill = Cell_type)) + 
  geom_boxplot(outlier.shape = NA) + scale_fill_manual(values=as.character(anno$colors), breaks = anno$order) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + ylab("Relative Marker Expression") + xlab("Cell type") + geom_jitter(shape=".", position=position_jitter(0.2)) + ylim(-1.3, 2.7)

print(p1+p2+p3)

We also sought to infer the gene expression patterns defining the expected transition from Epiblast, to primitive streak-like, to mesoderm-like populations. Though scRNA-seq is static data, capturing a snapshot of cells at a point in time, the presence of related cell types can still be used to reconstruct something like a developmental trajectory. For this purpose, we used pseudotime in a subset of PI-Epi, PS-like and Meso-like cells in an attempt to capture transcriptional signatures that resemble EMT in mesoderm formation, such as transitory expression of MESP2 and TBXT, and increasing expression of CDH2, LHX1 and HAND1.

# Subset
ps_emt <- subset(hEE, idents = c("PS-like", "Meso-like", "PI-Epi"))
ps_emt$Cell_type <- factor(ps_emt$Cell_type, levels = c("PI-Epi", "PS-like", "Meso-like")) # Set cluster order

# Preprocess
ps_emt <- ps_emt %>% 
  NormalizeData(verbose = FALSE) %>%
  FindVariableFeatures(verbose = FALSE) %>%
  ScaleData(verbose = FALSE) %>%
  RunPCA(verbose = FALSE)

# Rank along PC1
pca <- as.data.frame(ps_emt@reductions$pca@cell.embeddings)
ps_emt$pc_1 <- pca[,"PC_1"]
ps_emt$pseudotime_pc1 <- rank(ps_emt$pc_1)

p1 <- DimPlot(ps_emt, reduction = "pca") + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) # Epiblast cells far exceed the other two, prompting us to randomly downsample to comparable size

print(p1)

set.seed(42)
keep <- c(sample(colnames(subset(ps_emt, Cell_type == "PI-Epi")), size = (259+128), replace=F), colnames(subset(ps_emt, Cell_type == "PS-like" | Cell_type == "Meso-like"))) 
ps_emt <- ps_emt[, keep]

ps_emt <- ps_emt %>% 
  NormalizeData(verbose = FALSE) %>%
  FindVariableFeatures(verbose = FALSE) %>%
  ScaleData(verbose = FALSE) %>%
  RunPCA(verbose = FALSE)

p2 <- DimPlot(ps_emt, reduction = "pca")

pca <- as.data.frame(ps_emt@reductions$pca@cell.embeddings)
ps_emt$pc_1 <- pca[, "PC_1", drop = FALSE]
ps_emt$pseudotime <- rank(-ps_emt$pc_1)

p3 <- ggplot(ps_emt@meta.data, aes(x = pseudotime,
                          y = Cell_type, 
                          colour = Cell_type)) +
    geom_quasirandom(groupOnX = FALSE) + theme_classic() +
    xlab("PC1") + ylab("Cell type") +
    ggtitle("Cells ordered by PC1") + scale_color_manual(values=as.character(anno$colors), breaks = anno[anno$order %in% c("PS-like", "Meso-like", "PI-Epi"),]$order) + NoLegend()

print(p2 + p3)

sce <- slingshot(as.matrix(ps_emt@reductions$pca@cell.embeddings), clusterLabels = ps_emt$Cell_type)  
## Using full covariance matrix
# Plot Slingshot pseudotime vs cell stage. 
print(ggplot(ps_emt@meta.data, aes(x = slingPseudotime(sce), y = Cell_type, 
                              colour = Cell_type)) +
    geom_quasirandom(groupOnX = FALSE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + theme_classic() +
    xlab("Slingshot pseudotime") + ylab("Timepoint") +
    ggtitle("Cells ordered by Slingshot pseudotime"))

ps_emt$pseudotime_sling <- slingPseudotime(sce)

ps_sce <- as.SingleCellExperiment(ps_emt)

p1 <- plotExpression(ps_sce, c("CDH1"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p2 <- plotExpression(ps_sce, c("CDH2"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p3 <- plotExpression(ps_sce, c("TBXT"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p4 <- plotExpression(ps_sce, c("SNAI1"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p5 <- plotExpression(ps_sce, c("SNAI2"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p6 <- plotExpression(ps_sce, c("MMP2"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p7 <- plotExpression(ps_sce, c("MMP14"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p8 <- plotExpression(ps_sce, c("VIM"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p9 <- plotExpression(ps_sce, c("ZEB2"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p10 <- plotExpression(ps_sce, c("MESP1"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p11 <- plotExpression(ps_sce, c("LHX1"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime")+ NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p12 <- plotExpression(ps_sce, c("HAND1"), x = "pseudotime_sling", 
               colour_by = "Cell_type", show_violin = FALSE,
               show_smooth = TRUE) + scale_color_manual(values=as.character(anno$colors), breaks = anno$order) + xlab("Pseudotime") + NoLegend()
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p_list <- list(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12)

do.call("grid.arrange", c(p_list, ncol = 3)) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Conclusion

In this notebook, we have documented the decisions made, and underlying rationale, in establishing our single-cell data in terms of its quality control, low-dimensional projection, and cell type assignment, as well as demonstrated some of the analyses performed in the original manuscript. Remaining analyses are also available in the article GitHub repository under https://github.com/Smith-Lab-YSCC/hExtra_embryoid. Please contact , or , if you have questions or concerns about this analysis or its data.

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scater_1.18.6               SingleCellExperiment_1.12.0
##  [3] SummarizedExperiment_1.20.0 Biobase_2.50.0             
##  [5] GenomicRanges_1.42.0        GenomeInfoDb_1.26.7        
##  [7] IRanges_2.24.1              S4Vectors_0.28.1           
##  [9] BiocGenerics_0.36.1         MatrixGenerics_1.2.1       
## [11] slingshot_1.8.0             princurve_2.1.6            
## [13] ggbeeswarm_0.6.0            EnhancedVolcano_1.8.0      
## [15] ggrepel_0.9.1               clustree_0.5.0             
## [17] ggraph_2.0.5                matrixStats_0.62.0         
## [19] dplyr_1.0.9                 gridExtra_2.3              
## [21] ggplot2_3.3.6               sp_1.4-7                   
## [23] SeuratObject_4.1.0          Seurat_4.1.1               
## [25] rgl_0.110.2                
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                reticulate_1.24          
##   [3] tidyselect_1.1.2          htmlwidgets_1.5.4        
##   [5] grid_4.0.3                BiocParallel_1.24.1      
##   [7] Rtsne_0.16                munsell_0.5.0            
##   [9] codetools_0.2-18          ica_1.0-3                
##  [11] future_1.27.0             miniUI_0.1.1.1           
##  [13] withr_2.5.0               spatstat.random_2.2-0    
##  [15] colorspace_2.0-3          progressr_0.10.1         
##  [17] highr_0.9                 ggalt_0.4.0              
##  [19] knitr_1.39                rstudioapi_0.13          
##  [21] ROCR_1.0-11               tensor_1.5               
##  [23] Rttf2pt1_1.3.10           listenv_0.8.0            
##  [25] labeling_0.4.2            GenomeInfoDbData_1.2.4   
##  [27] polyclip_1.10-0           farver_2.1.0             
##  [29] parallelly_1.32.1         vctrs_0.4.1              
##  [31] generics_0.1.3            xfun_0.30                
##  [33] R6_2.5.1                  graphlayouts_0.8.0       
##  [35] rsvd_1.0.5                bitops_1.0-7             
##  [37] spatstat.utils_2.3-0      cachem_1.0.6             
##  [39] DelayedArray_0.16.3       assertthat_0.2.1         
##  [41] promises_1.2.0.1          scales_1.2.0             
##  [43] rgeos_0.5-9               beeswarm_0.4.0           
##  [45] gtable_0.3.0              beachmat_2.6.4           
##  [47] ash_1.0-15                globals_0.15.1           
##  [49] goftest_1.2-3             tidygraph_1.2.1          
##  [51] rlang_1.0.2               splines_4.0.3            
##  [53] extrafontdb_1.0           lazyeval_0.2.2           
##  [55] checkmate_2.1.0           spatstat.geom_2.4-0      
##  [57] yaml_2.3.5                reshape2_1.4.4           
##  [59] abind_1.4-5               backports_1.4.1          
##  [61] httpuv_1.6.5              extrafont_0.18           
##  [63] tools_4.0.3               ellipsis_0.3.2           
##  [65] spatstat.core_2.4-2       jquerylib_0.1.4          
##  [67] RColorBrewer_1.1-3        ggridges_0.5.3           
##  [69] Rcpp_1.0.8.3              plyr_1.8.7               
##  [71] sparseMatrixStats_1.2.1   base64enc_0.1-3          
##  [73] zlibbioc_1.36.0           purrr_0.3.4              
##  [75] RCurl_1.98-1.6            rpart_4.1.16             
##  [77] deldir_1.0-6              pbapply_1.5-0            
##  [79] viridis_0.6.2             cowplot_1.1.1            
##  [81] zoo_1.8-10                cluster_2.1.3            
##  [83] magrittr_2.0.3            RSpectra_0.16-1          
##  [85] data.table_1.14.2         scattermore_0.8          
##  [87] lmtest_0.9-40             RANN_2.6.1               
##  [89] fitdistrplus_1.1-8        patchwork_1.1.1          
##  [91] mime_0.12                 evaluate_0.15            
##  [93] xtable_1.8-4              compiler_4.0.3           
##  [95] tibble_3.1.6              maps_3.4.0               
##  [97] crayon_1.5.1              KernSmooth_2.23-20       
##  [99] htmltools_0.5.2           mgcv_1.8-40              
## [101] later_1.3.0               tidyr_1.2.0              
## [103] DBI_1.1.3                 tweenr_1.0.2             
## [105] proj4_1.0-11              MASS_7.3-56              
## [107] Matrix_1.4-1              cli_3.1.1                
## [109] igraph_1.3.0              pkgconfig_2.0.3          
## [111] plotly_4.10.0             scuttle_1.0.4            
## [113] spatstat.sparse_2.1-1     vipor_0.4.5              
## [115] bslib_0.4.0               XVector_0.30.0           
## [117] stringr_1.4.0             digest_0.6.29            
## [119] sctransform_0.3.3         RcppAnnoy_0.0.19         
## [121] spatstat.data_2.2-0       rmarkdown_2.14           
## [123] leiden_0.4.2              uwot_0.1.11              
## [125] DelayedMatrixStats_1.12.3 shiny_1.7.2              
## [127] lifecycle_1.0.1           nlme_3.1-157             
## [129] jsonlite_1.8.0            BiocNeighbors_1.8.2      
## [131] limma_3.46.0              viridisLite_0.4.0        
## [133] fansi_1.0.3               pillar_1.8.0             
## [135] lattice_0.20-45           ggrastr_1.0.1            
## [137] fastmap_1.1.0             httr_1.4.3               
## [139] survival_3.3-1            glue_1.6.2               
## [141] png_0.1-7                 ggforce_0.3.3            
## [143] stringi_1.7.6             sass_0.4.1               
## [145] BiocSingular_1.6.0        irlba_2.3.5              
## [147] future.apply_1.9.0        ape_5.6-2